---
title: "Merging Data"
output: html_notebook
---

Merge the three datasets, including the updated versions of DV and CV.

```{r}

library(tidyverse)
library(readstata13)
library(foreign)
library(readxl)
library(writexl)

dv <- read_xlsx("Ortega(2024)-DV_FV-Updated(09162024).xlsx")
iv <- read_xlsx("Ortega(2024)-IV_FV(09302024).xlsx")
cv <- read_xlsx("Ortega(2024)-CV_FV-Updated(09282024).xlsx")

rcs_data <- read_xlsx("TSCS-Colombia(1964-2005).xlsx")

rcs_data <- rcs_data %>%
  select(divipola, year)

dv$year <- as.integer(dv$year)
dv$divipola <- as.integer(dv$divipola)

iv$year <- as.integer(iv$year)
iv$divipola <- as.integer(iv$divipola)

cv$year <- as.integer(cv$year)
cv$divipola <- as.integer(cv$divipola)

m_data_f <- left_join(rcs_data, dv)
m_data_f <- left_join(m_data_f, iv)
m_data_f <- left_join(m_data_f, cv)

m_data_f <- m_data_f %>%
  filter(year > 1984 & year < 2006)

m_data_f <- m_data_f %>%
  rename(updated_pop = updated_pop.x)

# Cleaning DV

m_data_f <- m_data_f %>%
  rename(
    reb_ckill_up1 = reb_ckill,
    reb_ckill_sv_up1 = reb_ckill_sv,
    farc_ckill_up1 = farc_ckill,
    farc_ckill_sv_up1 = farc_ckill_sv,
    eln_ckill_up1 = eln_ckill,
    eln_ckill_sv_up1 = eln_ckill_sv,
    reb_cmass_up1 = reb_cmass,
    reb_cmass_sv_up1 = reb_cmass_sv,
    farc_cmass_up1 = farc_cmass,
    farc_cmass_sv_up1 = farc_cmass_sv,
    eln_cmass_up1 = eln_cmass,
    eln_cmass_sv_up1 = eln_cmass_sv,
    reb_cdv_up1 = reb_cdv,
    reb_cdv_sv_up1 = reb_cdv_sv,
    farc_cdv_up1 = farc_cdv,
    farc_cdv_sv_up1 = farc_cdv_sv,
    eln_cdv_up1 = eln_cdv,
    eln_cdv_sv_up1 = eln_cdv_sv)

m_data_f <- m_data_f %>%
  mutate_at(vars(reb_ckill_up1:eln_cdv_sv_up1)
            , ~replace_na(., 0)) 

m_data_f <- m_data_f %>%
  mutate(dv4b.1.1.reb_ckill_up1_r = (reb_ckill_up1/updated_pop)*100000, 
         dv4b.2.1.farc_ckill_up1_r = (farc_ckill_up1/updated_pop)*100000, 
         dv4b.3.1.eln_ckill_up1_r = (eln_ckill_up1/updated_pop)*100000      
  )


dat_weint <- read_csv("Weintraub(2016)-Replication.csv")
  # Civilians killed by FARC (HROD Presidency of Colombia), based on Weintraub (2016).

dat_weint <- dat_weint %>%
  dplyr::select(divipola, year,
                kciv_FARC, attacks_FARC, indiscrim_FARC)

m_data_f <- left_join(m_data_f, dat_weint)

m_data_f <- m_data_f %>%
  mutate(dv2.1.kciv_FARC_r = (kciv_FARC/updated_pop)*100000)

# Cleaning IV

m_data_f <- m_data_f %>%
  mutate_at(vars(p_strat:v_strat2, cr, cr2)
            , ~replace_na(., 0)) 

m_data_f <- m_data_f %>%
  rename(iv2.p_strat = p_strat,
         iv3.s_strat = s_strat,
         iv4.v_strat = v_strat,
         iv4b.v_strat = v_strat2,
         iv1.1.cr = cr,
         iv1.2.cr = cr2)

m_data_f <- m_data_f %>% ## Creating dummy variables as IVs. Based on type of strategy.
  mutate(
    cv8.2.no_p_str = case_when(
      iv2.p_strat == 0 & iv1.2.cr == 1 ~ 1,
      TRUE ~ 0),
    cv8.3.no_s_str = case_when(
      iv3.s_strat == 0 & iv1.2.cr == 1 ~ 1,
      TRUE ~ 0),
    cv8.4a.no_v_str = case_when( 
      iv4.v_strat == 0 & iv1.1.cr == 1 ~ 1,
      TRUE ~ 0),
    cv8.4b.no_v_str = case_when( 
      iv4b.v_strat == 0 & iv1.2.cr == 1 ~ 1,
      TRUE ~ 0)) 

m_data_f <- m_data_f %>%
  filter(updated_pop != 0)

m_data_f2 <- m_data_f %>%
  dplyr::select(year, # Time ID
                divipola, # Unit ID
                dv4b.2.1.farc_ckill_up1_r, # Outcome V: FARC (OMC - Updated)
                dv4b.3.1.eln_ckill_up1_r, # Outcome V: ELN (OMC - Updated)
                dv2.1.kciv_FARC_r, # Outcome V: FARC (HROD-Presidency of Colombia)
                iv2.p_strat, # Treatment 1: Protests
                iv3.s_strat, # Treatment 2: Sanctuary
                iv4b.v_strat, # Treatment 3: Violent self-protection
                cv1.lpop, # Control V1: Population
                cv2.rur_p, # Control V2: Rurality
                cv3c.c_lsh1, # Control V3c1: Shared votes for the Left at Councils - Manual
                cv4.nbi, # Control V4: Unsatisfied Basic Needs
                cv5c.1.bd_o, # Control V5: OMC (Updated) - Balance of military dispute
                cv6b.1.ce_o_r, # Control V6: OMC (Updated) - Rates of conflict events.
                cv7b.3.pgf_cd_o_r, #Control V7: OMC (Updated) Rates of civilians killed by PGF
                cv8.2.no_p_str, # Control V8: Other strategies but protest
                cv8.4b.no_v_str, # Control V9: Other strategies but violent self-protection
                cv8.3.no_s_str, # Control V10: Other strategies but sanctuary
                median_c_lsh_m, # Median Vote for the Left at Councils - Manual
                reb_ru # Rebel recruitment (raw number)
  ) 

save.dta13(m_data_f2, file="Ortega(2024)-DataAnalysis_Updated(09302024).dta")
  # Checked

write_xlsx(m_data_f, "Ortega(2024)-AllVariables_Updated(09302024).xlsx")

```

End of this part
